import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd
import numpy as np
import pickle

atp_tennis = pd.read_csv(os.path.join(sys.argv[1], 'atp_tennis.csv'))

federer_matches = atp_tennis[(atp_tennis['Player_1'] == 'Federer R.') | (atp_tennis['Player_2'] == 'Federer R.')]
federer_match_ids = federer_matches.index.tolist()

print(federer_match_ids)
# pickle.dump(federer_match_ids,open("./ref_result/federer_match_ids.pkl","wb"))

import pandas as pd
import pickle


federer_matches = atp_tennis[(atp_tennis['Player_1'] == 'Federer R.') | (atp_tennis['Player_2'] == 'Federer R.')]   
federer_opponents = federer_matches['Player_1'].where(federer_matches['Player_1'] != 'Federer R.', federer_matches['Player_2']).unique()   

print(federer_opponents)
# pickle.dump(federer_opponents,open("./ref_result/federer_opponents.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


def calculate_performance(player, opponent, data, surface_types):   
    player_matches = data[((data['Player_1'] == player) & (data['Player_2'] == opponent)) | ((data['Player_1'] == opponent) & (data['Player_2'] == player))]   
    total_matches = len(player_matches)   
    wins = len(player_matches[player_matches['Winner'] == player])   
    win_rate = wins / total_matches if total_matches > 0 else 0   
    h2h_record = wins / total_matches if total_matches > 0 else 0   

    surface_performance = {}   
    for surface in surface_types:   
        player_surface_matches = player_matches[player_matches['Surface'] == surface]   
        player_surface_wins = len(player_surface_matches[player_surface_matches['Winner'] == player])   
        surface_performance[surface] = player_surface_wins / player_surface_matches.shape[0] if player_surface_matches.shape[0] > 0 else 0   

    return win_rate, h2h_record, surface_performance   


import pandas as pd
import numpy as np
import pickle


# Retrieve unique surface types
surface_types = atp_tennis['Surface'].unique()

# Initialize an empty dataframe for storing the results   
columns = ['Player', 'Opponent', 'Win Rate', 'Head-to-Head'] + [f'{surface} Surface Performance' for surface in surface_types]   
player_stats = pd.DataFrame(columns=columns)   

# Loop through Federer's opponents   
for opponent in federer_opponents:   
    # Calculate win rate, head-to-head record, and surface performance for Raonic M. against each opponent   
    win_rate, h2h_record, surface_performance = calculate_performance('Raonic M.', opponent, atp_tennis, surface_types)   

    # Append the results to the dataframe   
    row_data = {'Player': 'Raonic M.', 'Opponent': opponent, 'Win Rate': win_rate, 'Head-to-Head': h2h_record}   
    row_data.update({f'{surface} Surface Performance': surface_performance[surface] for surface in surface_types})   
    player_stats = pd.concat([player_stats, pd.DataFrame(row_data, index=[0])], ignore_index=True)   

# Display the resulting dataframe   
print(player_stats)
# pickle.dump(player_stats,open("./ref_result/player_stats.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


def calculate_head_to_head_win_rate(data):  
    unique_players = pd.concat([data['Player_1'], data['Player_2']]).unique()  
  
    total_matches = data.groupby(['Player_1', 'Player_2']).size().reset_index().rename(columns={0: 'count_total'})  
    player_wins = data[data['Winner'] == data['Player_1']].groupby(['Player_1', 'Player_2']).size().reset_index().rename(columns={0: 'count_wins'})  
  
    all_pairs = pd.MultiIndex.from_product([unique_players, unique_players], names=['Player', 'Opponent']).to_frame(index=False)  
    all_pairs = all_pairs[all_pairs['Player'] != all_pairs['Opponent']]  
      
    h2h_win_rates = all_pairs.merge(total_matches, left_on=['Player', 'Opponent'], right_on=['Player_1', 'Player_2'], how='left').drop(columns=['Player_1', 'Player_2'])  
    h2h_win_rates = h2h_win_rates.merge(player_wins, left_on=['Player', 'Opponent'], right_on=['Player_1', 'Player_2'], how='left').drop(columns=['Player_1', 'Player_2'])  
  
    h2h_win_rates['count_wins'].fillna(0, inplace=True)  
    h2h_win_rates['count_total'].fillna(1, inplace=True)  
    h2h_win_rates['Head-to-Head Win Rate'] = h2h_win_rates['count_wins'] / h2h_win_rates['count_total']  
  
    h2h_win_rates.drop(columns=['count_total', 'count_wins'], inplace=True)  
  
    return h2h_win_rates   


import pandas as pd
import numpy as np
import pickle


def estimate_probability(player_1_win_rate, player_2_win_rate, h2h_player_1_win_rate, h2h_player_2_win_rate, performance_weight=0.7, h2h_weight=0.3):  
    if player_1_win_rate + player_2_win_rate == 0:  
        player_1_win_rate = player_2_win_rate = 0.5  
  
    if h2h_player_1_win_rate + h2h_player_2_win_rate == 0:  
        h2h_player_1_win_rate = h2h_player_2_win_rate = 0.5  
  
    weighted_player_1_win_rate = performance_weight * player_1_win_rate + h2h_weight * h2h_player_1_win_rate  
    weighted_player_2_win_rate = performance_weight * player_2_win_rate + h2h_weight * h2h_player_2_win_rate  
    total_weighted_win_rate = weighted_player_1_win_rate + weighted_player_2_win_rate  
  
    player_1_probability = weighted_player_1_win_rate / total_weighted_win_rate  
    player_2_probability = weighted_player_2_win_rate / total_weighted_win_rate  
  
    return player_1_probability, player_2_probability  


import pandas as pd
import numpy as np
import pickle


# Function to update odds based on performance weight and head-to-head weight  
def update_odds(match_id, performance_weight, h2h_weight, data, h2h_win_rates):  
    match = data.loc[match_id]  
    player_1 = match['Player_1']  
    player_2 = match['Player_2']  
  
    player_1_win_rate = player_stats.loc[player_stats['Opponent'] == player_1, 'Win Rate']  
    player_2_win_rate = player_stats.loc[player_stats['Opponent'] == player_2, 'Win Rate']  
  
    player_1_win_rate = player_1_win_rate.iloc[0] if not player_1_win_rate.empty else 0  
    player_2_win_rate = player_2_win_rate.iloc[0] if not player_2_win_rate.empty else 0  
  
    h2h_player_1_win_rate = h2h_win_rates.loc[h2h_win_rates['Opponent'] == player_2, 'Head-to-Head Win Rate']  
    h2h_player_2_win_rate = h2h_win_rates.loc[h2h_win_rates['Opponent'] == player_1, 'Head-to-Head Win Rate']  
  
    h2h_player_1_win_rate = h2h_player_1_win_rate.iloc[0] if not h2h_player_1_win_rate.empty else 0  
    h2h_player_2_win_rate = h2h_player_2_win_rate.iloc[0] if not h2h_player_2_win_rate.empty else 0  
  
    player_1_probability, player_2_probability = estimate_probability(player_1_win_rate, player_2_win_rate, h2h_player_1_win_rate, h2h_player_2_win_rate, performance_weight, h2h_weight)  
    max_odd_1 = data['Odd_1'].max()  
    max_odd_2 = data['Odd_2'].max()  
      
    if player_1_probability == 0:  
        updated_odd_1 = max_odd_1  
    else:  
        updated_odd_1 = 1 / player_1_probability  
      
    if player_2_probability == 0:  
        updated_odd_2 = max_odd_2  
    else:  
        updated_odd_2 = 1 / player_2_probability  
  
    return updated_odd_1, updated_odd_2  



import pandas as pd
import numpy as np
import pickle


# Calculate head-to-head win rates  
h2h_win_rates = calculate_head_to_head_win_rate(atp_tennis)  
print(h2h_win_rates)
  
# Initialize an empty DataFrame for storing the results  
updated_odds_df = pd.DataFrame(columns=['Match ID', 'Original Odd_1', 'Original Odd_2', 'Updated Odd_1', 'Updated Odd_2'])  

# Loop through the affected matches  
for match_id in federer_match_ids:  
    # Update the odds for the match  
    updated_odd_1, updated_odd_2 = update_odds(match_id, 0.7, 0.3, atp_tennis, h2h_win_rates)  
  
    # Append the results to the DataFrame  
    original_odd_1 = atp_tennis.loc[match_id, 'Odd_1']  
    original_odd_2 = atp_tennis.loc[match_id, 'Odd_2']  
    updated_odds_df = pd.concat([updated_odds_df, pd.DataFrame({'Match ID': [match_id], 'Original Odd_1': [original_odd_1], 'Original Odd_2': [original_odd_2], 'Updated Odd_1': [updated_odd_1], 'Updated Odd_2': [updated_odd_2]})], ignore_index=True)  

# Display the resulting DataFrame  
print(updated_odds_df)
pickle.dump(updated_odds_df,open("./ref_result/updated_odds_df.pkl","wb"))  